# Load libraries
library(tidyverse)
library(tidyterra)
library(tidync)
library(ggridges)
library(readxl)
library(janitor)
library(lubridate)
library(sdmTMB)
library(ncdf4)
library(patchwork)
library(terra)
library(viridis)
library(devtools)
library(ggsidekick); theme_set(theme_sleek())
library(crayon)
library(marmap)
library(tidylog)
# Point to wd
home <- here::here()
# Load all custom functions in R/function
# - map-plot [source_url("https://raw.githubusercontent.com/maxlindmark/cod-interactions/main/R/functions/map-plot.R")]
# - callCopernicusCovariate
# - extractCovariateAtLocation
for(fun in list.files(paste0(home, "/R/functions"))){
source(paste(home, "R/functions", fun, sep = "/"))
}Collate larval size data
Explore data
Read and clean data
Old data
# 1992 -2008
length_old <- read_excel(paste0(home, "/data/larvea/1992_2010 MIK SWE Alla arter.xlsx"),
sheet = 1,
skip = 20)
# Clean data!
length_old <- length_old |>
# some columns are on row 19, most are on 20, so lets fix this manually
# we take lat from trawl data
# rename(lat = `lat degree...10`,
# lon = `long degree...16`) |>
# clean names so that I can pivot wider
clean_names() |>
pivot_longer(cols = x3:x130, names_to = "length_mm", values_to = "n") |>
dplyr::select(year, day, month, haul, species, n, length_mm) |>
# make it numeric
mutate(length_mm = as.numeric(str_remove(length_mm, "x"))) |>
# NA n means there was no recorded larvae in that size group (note there are also 0s in the data)
drop_na(n) |>
filter(n > 0) |>
# expand the data so that 1 row is 1 individual
# FIXME: the counts are not integers?! (hence the need for round)
uncount(round(n)) |>
dplyr::select(-n) |>
# make a unique haul ID so that we can match with trawl data
mutate(haul_id = paste(year, month, day, haul, sep = "_"))
# Read trawl data and match in coordinates
trawl_old <- read_excel(paste0(home, "/data/larvea/1992-2010 MIK SWE Tråldata.xlsx"),
sheet = 1,
skip = 8) |>
clean_names() |>
# the last two coordinate columns are decimal degrees of haul position
rename(haul = haul_no,
lat = lat_decim_20,
lon = long_decim_21) |>
# two rows without info, including year, so I'm dropping these
drop_na(year) |>
mutate(haul_id = paste(year, month, day, haul, sep = "_")) |>
distinct(haul_id, .keep_all = TRUE) |>
dplyr::select(haul_id, lat, lon, temp)
# Join trawl data to length data
length_old <- length_old |>
left_join(trawl_old, by = "haul_id") |>
dplyr::select(-haul) |>
mutate(period = "old",
day = as.numeric(day),
month = as.numeric(month))New data
# 1992 -2008
length_new <- read_excel(paste0(home, "/data/larvea/ELDB(s) bara fisk 2008-2024.xlsx")) |>
clean_names() |>
rename(length_mm = length) |>
dplyr::select(haul_id, species, length_mm) |>
drop_na(length_mm)
trawl_new <- read_excel(paste0(home, "/data/larvea/ELDB 2008-2024.xlsx")) |>
clean_names() |>
rename(lat = start_latitud,
lon = start_longitud) |>
dplyr::select(year, day, month, haul_id, lat, lon, sur_temp) |>
rename(temp = sur_temp)
length_new <- length_new |>
left_join(trawl_new, by = "haul_id") |>
mutate(period = "old",
year = as.numeric(year),
day = as.numeric(day),
month = as.numeric(month))Join old and new
d <- bind_rows(length_new, length_old) |>
mutate(yday = yday(paste(year, month, day, sep = "-"))) |>
filter(lon > 8) |>
drop_na(lat) |>
rename(temp_obs = temp)
# Add km UTM coords
d <- d |>
add_utm_columns(ll_names = c("lon", "lat"))Explore data
# Sample size
plot_map_fc +
geom_point(data = d |>
group_by(haul_id, Y, X, year) |>
summarise(n = n()),
aes(X*1000, Y*1000, color = n),
size = 0.5) +
facet_wrap(~year, ncol = 8) +
scale_color_viridis(trans = "sqrt") +
ggtitle("Sample size per haul")# Day of the year
plot_map_fc +
geom_point(data = d |>
distinct(haul_id, .keep_all = TRUE),
aes(X*1000, Y*1000, color = yday),
size = 0.5) +
facet_wrap(~year, ncol = 8) +
scale_color_viridis(trans = "sqrt") +
ggtitle("Day of the year of sampling in space")# Which dates are sampled?
d |>
ggplot(aes(as.factor(month))) +
scale_fill_viridis(discrete = TRUE) +
geom_histogram(stat= "count")d |>
ggplot(aes(x = yday, y = as.factor(year), fill = after_stat(x))) +
scale_fill_viridis(alpha = 0.8, name = "") +
geom_density_ridges_gradient(alpha = 0.75) +
theme_facet_map() +
labs(y = "year")# Species
sort(unique(d$species)) [1] "aequorea" "agonus cataphractus"
[3] "Agonus cataphractus" "alloteuthis subulata"
[5] "ammodytes marinus" "ammodytidae"
[7] "Ammodytidae" "anarhichas lupus"
[9] "Anarhichas lupus" "anguilla anguilla"
[11] "Anguilla anguilla" "aphia minuta"
[13] "Aphia minuta" "argentina silus"
[15] "Argentina silus" "argentina sphyraena"
[17] "argentina spp" "Argentina spp"
[19] "argentinidae" "arnoglossus laterna"
[21] "Arnoglossus laterna" "branchiostoma lanceolatum"
[23] "Branchiostoma lanceolatum" "buglossidium luteum"
[25] "Buglossidium luteum" "callionymidae"
[27] "Callionymidae" "callionymus lyra"
[29] "Callionymus lyra" "callionymus maculatus"
[31] "Callionymus maculatus" "callionymus reticulatus"
[33] "chirolophis ascanii" "Chirolophis ascanii"
[35] "clupea harengus" "Clupea harengus"
[37] "CLUPEA HARENGUS ADULT" "clupeidae"
[39] "Clupeidae" "cottidae"
[41] "Cottus spp" "crystallogobius linearis"
[43] "Crystallogobius linearis" "Cyclopterus lumpus"
[45] "echiodon drummondi" "enchelyopus cimbrius"
[47] "Enchelyopus cimbrius" "Engraulis encrasicholus"
[49] "engraulis encrasicolus" "entelurus aequoreus"
[51] "Entelurus aequoreus" "eutrigla gurnardus"
[53] "Eutrigla gurnardus" "Gadidae"
[55] "gadus morhua" "Gadus morhua"
[57] "gaidropsarus argentatus" "gasterosteus aculeatus"
[59] "Gasterosteus aculeatus" "glyptocephalus cynoglossus"
[61] "Glyptocephalus cynoglossus" "Gobiidae"
[63] "hippoglossoides platessoides" "Hippoglossoides platessoides"
[65] "Hyperoplus lanceolatus" "illex"
[67] "lebetus scorpioides" "Lebetus scorpioides"
[69] "limanda limanda" "Limanda limanda"
[71] "liparis montagui" "Liparis montagui"
[73] "loligo" "Lumpenus lampretaeformis"
[75] "maurolicus muelleri" "Maurolicus muelleri"
[77] "merlangius merlangus" "Merlangius merlangus"
[79] "merluccius merluccius" "Merluccius merluccius"
[81] "microstomus kitt" "Microstomus kitt"
[83] "molva molva" "Mugilidae"
[85] "myoxocephalus scorpioides" "myoxocephalus scorpius"
[87] "Myoxocephalus scorpius" "Nerophis lumbriciformis"
[89] "Osmerus eperlanus" "pholis gunnellus"
[91] "Pholis gunnellus" "phrynorhombus norvegicus"
[93] "Phrynorhombus norvegicus" "phycidae"
[95] "Phycidae" "physidae"
[97] "pleuronectes platessa" "Pleuronectes platessa"
[99] "pleuronectidae" "Pleuronectidae"
[101] "Pleuronectiformes" "Pollachius pollachius"
[103] "pomatoschistus sp" "Pomatoschistus sp"
[105] "sardina pilchardus" "sepiola atlantica"
[107] "solea solea" "Solea solea"
[109] "sprattus sprattus" "Sprattus sprattus"
[111] "SPRATTUS SPRATTUS ADULT" "syngnathus acus"
[113] "Syngnathus acus" "syngnathus rostellatus"
[115] "Syngnathus rostellatus" "Syngnathus spp"
[117] "syngnathus typhle" "Syngnathus typhle"
[119] "taurulus bubalis" "Taurulus bubalis"
[121] "Trachurus trachurus" "Triglidae"
[123] "Trisopterus esmarkii" "UNIDENTIFIED"
# Clean species names!
d <- d |>
mutate(species = str_to_sentence(species))
sort(unique(d$species)) [1] "Aequorea" "Agonus cataphractus"
[3] "Alloteuthis subulata" "Ammodytes marinus"
[5] "Ammodytidae" "Anarhichas lupus"
[7] "Anguilla anguilla" "Aphia minuta"
[9] "Argentina silus" "Argentina sphyraena"
[11] "Argentina spp" "Argentinidae"
[13] "Arnoglossus laterna" "Branchiostoma lanceolatum"
[15] "Buglossidium luteum" "Callionymidae"
[17] "Callionymus lyra" "Callionymus maculatus"
[19] "Callionymus reticulatus" "Chirolophis ascanii"
[21] "Clupea harengus" "Clupea harengus adult"
[23] "Clupeidae" "Cottidae"
[25] "Cottus spp" "Crystallogobius linearis"
[27] "Cyclopterus lumpus" "Echiodon drummondi"
[29] "Enchelyopus cimbrius" "Engraulis encrasicholus"
[31] "Engraulis encrasicolus" "Entelurus aequoreus"
[33] "Eutrigla gurnardus" "Gadidae"
[35] "Gadus morhua" "Gaidropsarus argentatus"
[37] "Gasterosteus aculeatus" "Glyptocephalus cynoglossus"
[39] "Gobiidae" "Hippoglossoides platessoides"
[41] "Hyperoplus lanceolatus" "Illex"
[43] "Lebetus scorpioides" "Limanda limanda"
[45] "Liparis montagui" "Loligo"
[47] "Lumpenus lampretaeformis" "Maurolicus muelleri"
[49] "Merlangius merlangus" "Merluccius merluccius"
[51] "Microstomus kitt" "Molva molva"
[53] "Mugilidae" "Myoxocephalus scorpioides"
[55] "Myoxocephalus scorpius" "Nerophis lumbriciformis"
[57] "Osmerus eperlanus" "Pholis gunnellus"
[59] "Phrynorhombus norvegicus" "Phycidae"
[61] "Physidae" "Pleuronectes platessa"
[63] "Pleuronectidae" "Pleuronectiformes"
[65] "Pollachius pollachius" "Pomatoschistus sp"
[67] "Sardina pilchardus" "Sepiola atlantica"
[69] "Solea solea" "Sprattus sprattus"
[71] "Sprattus sprattus adult" "Syngnathus acus"
[73] "Syngnathus rostellatus" "Syngnathus spp"
[75] "Syngnathus typhle" "Taurulus bubalis"
[77] "Trachurus trachurus" "Triglidae"
[79] "Trisopterus esmarkii" "Unidentified"
d |>
group_by(year, species) |>
summarise(n = n()) |>
ggplot(aes(year, n, fill = species)) +
guides(fill = "none") +
facet_wrap(~species, scales = "free_y") +
geom_bar(stat = "identity")# Filter species with at least 5 years of data and minimum 5 sizes per year
d <- d |>
group_by(species, year) |>
# At least 3 samples by year
mutate(n = n()) |>
ungroup() |>
filter(n >= 3) |>
group_by(species) |>
mutate(n_years = length(unique(year))) |>
ungroup() |>
filter(n_years >= 5) |>
dplyr::select(-n_years, -n)Trim data
Sizes are here transition to post-larvae (after which they metamorphose into juveniles). In some cases, the distribution of sizes overlap a lot with the postlarve size. In those cases we go for the size where they move from pelagic to benthic habitats (which often is what splits the size-distributions). Some species are also not larvae.
sort(unique(d$species)) [1] "Agonus cataphractus" "Ammodytidae"
[3] "Anguilla anguilla" "Aphia minuta"
[5] "Argentina silus" "Argentina spp"
[7] "Chirolophis ascanii" "Clupea harengus"
[9] "Crystallogobius linearis" "Enchelyopus cimbrius"
[11] "Limanda limanda" "Maurolicus muelleri"
[13] "Microstomus kitt" "Myoxocephalus scorpius"
[15] "Pholis gunnellus" "Pomatoschistus sp"
[17] "Sardina pilchardus" "Sprattus sprattus"
[19] "Sprattus sprattus adult" "Syngnathus rostellatus"
[21] "Taurulus bubalis"
# Agonus cataphractus: Trim the 3 data points above postlarvale sizes of 20 mm
d <- d |> filter(!(species == "Agonus cataphractus" & length_mm > 20))filter: removed 3 rows (<1%), 49,104 rows remaining
# Ammodytidae: Two species, don't foget to add ammodyteas into this group in the data processing script, since identification to species level likely has changed over time! There also appears to be two cohorts. Post-larvae cutoff at 26-30 mm cutoff at post-larvae for the two species, but here perhaps we want to split around 55 mm to separate the clusters?
d |>
filter((species == "Ammodytidae" & length_mm < 57)) |>
ggplot(aes(length_mm)) +
geom_histogram()filter: removed 47,921 rows (98%), 1,183 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d <- d |> filter(!(species == "Ammodytidae" & length_mm > 58))filter: removed 166 rows (<1%), 48,938 rows remaining
# Anguilla anguilla: Ok
# Aphia minuta: not larvae but can include anyway. Remove the one outlier
d |>
filter(species == "Aphia minuta") |>
ggplot(aes(length_mm)) +
geom_histogram()filter: removed 42,288 rows (86%), 6,650 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d <- d |> filter(!(species == "Aphia minuta" & length_mm > 60))filter: removed one row (<1%), 48,937 rows remaining
# Argentina silus: Merge with argentna spp? Seems like identification to species level changed over time. Post larvae under 50 which seems fitting for a cutoff size
d |>
filter(species %in% c("Argentina silus", "Argentina spp")) |>
ggplot(aes(length_mm, fill = species)) +
geom_histogram()filter: removed 48,746 rows (>99%), 191 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d <- d |> filter(!(species %in% c("Argentina silus", "Argentina spp") & length_mm > 50))filter: removed 7 rows (<1%), 48,930 rows remaining
# Chirolophis ascanii: Post-larvae around 20 mm. However, data indicate a size for metamorphosis around 30 mm (since they then settle on the seabead).
# d |>
# filter(species == "Chirolophis ascanii") |>
# ggplot(aes(length_mm)) +
# geom_histogram()
#
# d <- d |> filter(!(species == "Chirolophis ascanii" & length_mm > 30))
# Clupea harengus: Post-larve until 48-50 mm, seems fitting
d |>
filter(species %in% c("Clupea harengus")) |>
ggplot(aes(length_mm, fill = species)) +
geom_histogram()filter: removed 35,916 rows (73%), 13,014 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d <- d |> filter(!(species == "Clupea harengus" & length_mm > 50))filter: removed 6 rows (<1%), 48,924 rows remaining
# Crystallogobius linearis: Adult, otherwise seems OK. Malin was going to check something with the gear: are we sampling a different part of the size-distribution now?
# Enchelyopus cimbrius: Potentially different cohorts because spawning time is long and potentially not accurate in the literature given the sizes we observe here. <span style="color:red;">Drop this from the study</span>
d <- d |> filter(!(species == "Enchelyopus cimbrius"))filter: removed 71 rows (<1%), 48,853 rows remaining
# Limanda limanda: Post-larve may remain pelagic until 30 mm, use that as a cutoff.
d |>
filter(species %in% c("Limanda limanda")) |>
ggplot(aes(length_mm, fill = species)) +
geom_histogram()filter: removed 48,595 rows (99%), 258 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d <- d |> filter(!(species == "Limanda limanda" & length_mm > 30))filter: removed 11 rows (<1%), 48,842 rows remaining
# Maurolicus muelleri: These are not larvae and not spawning frequently here. <span style="color:red;">Drop this from the study</span>
d <- d |> filter(!(species == "Maurolicus muelleri"))filter: removed 138 rows (<1%), 48,704 rows remaining
# Microstomus kitt: metamorphosis at 18 mm, but still pelagic? we judge that they are pelagic until 40 mm
d |>
filter(species %in% c("Microstomus kitt")) |>
ggplot(aes(length_mm, fill = species)) +
geom_histogram()filter: removed 48,058 rows (99%), 646 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d <- d |> filter(!(species == "Microstomus kitt" & length_mm > 40))filter: removed 6 rows (<1%), 48,698 rows remaining
# Myoxocephalus scorpius: Pelagic until about 20 mm, use that as a cutoff
d |>
filter(species %in% c("Myoxocephalus scorpius")) |>
ggplot(aes(length_mm, fill = species)) +
geom_histogram()filter: removed 48,157 rows (99%), 541 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d <- d |> filter(!(species == "Myoxocephalus scorpius" & length_mm > 20))filter: removed 2 rows (<1%), 48,696 rows remaining
# Pholis gunnellus: pelagic until 35 mm, use that as a cutoff
d |>
filter(species %in% c("Pholis gunnellus")) |>
ggplot(aes(length_mm, fill = species)) +
geom_histogram()filter: removed 44,107 rows (91%), 4,589 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d <- d |> filter(!(species == "Pholis gunnellus" & length_mm > 35))filter: removed 14 rows (<1%), 48,682 rows remaining
# Pomatoschistus sp: Not larve, could keep for the same reason as other non-larvae species
# Sardina pilchardus: Ok
# Sprattus sprattus: Too many adults because they don't spawn at the right time. If we trim to postlarvae, then we have too few anyway. <span style="color:red;">Drop this from the study</span>
d <- d |> filter(!species == "Sprattus sprattus")filter: removed 928 rows (2%), 47,754 rows remaining
# Syngnathus rostellatus: No larval stage, can keep for the same reason as other non-larvae
# Taurulus bubalis: 12 mm cutoff (is that post-larva maximum size or start?)
d |>
filter(species %in% c("Taurulus bubalis")) |>
ggplot(aes(length_mm, fill = species)) +
geom_histogram()filter: removed 47,714 rows (>99%), 40 rows remaining
`stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
d <- d |> filter(!(species == "Taurulus bubalis" & length_mm > 35))filter: removed one row (<1%), 47,753 rows remaining
# Plot species samples in space, color by year
plot_map_fc +
geom_point(data = d,
aes(X*1000, Y*1000, color = year),
size = 0.5) +
facet_wrap(~species, ncol = 6) +
scale_color_viridis() +
ggtitle("Samples in space by species and year")d |>
group_by(year, species) |>
summarise(n = n()) |>
ggplot(aes(year, n, fill = species)) +
guides(fill = "none") +
facet_wrap(~species, scales = "free_y", ncol = 5) +
geom_bar(stat = "identity") +
scale_fill_viridis(discrete = TRUE)group_by: 2 grouping variables (year, species)
summarise: now 403 rows and 3 columns, one group variable remaining (year)
Add covariate to hauls
# Specify covariates path for simplicity
covPath <- paste0(home, "/data/covariates")Satellite derived temperatures
https://data.marine.copernicus.eu/product/SST_BAL_SST_L4_REP_OBSERVATIONS_010_016/description
## Load satellite derived SST.
# Source: https://data.marine.copernicus.eu/product/SST_BAL_SST_L4_REP_OBSERVATIONS_010_016/download
# Print details
print(nc_open(paste(covPath, "sst", "DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802008633.nc", sep = "/")))File /Users/maxlindmark/Dropbox/Max work/R/larval-sizes/data/covariates/sst/DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_1711802008633.nc (NC_FORMAT_NETCDF4):
1 variables (excluding dimension variables):
float analysed_sst[longitude,latitude,time] (Contiguous storage)
units: kelvin
_FillValue: NaN
standard_name: sea_surface_foundation_temperature
long_name: Analysed sea surface temperature
3 dimensions:
latitude Size:355
standard_name: latitude
long_name: Latitude
units: degrees_north
unit_long: Degrees North
axis: Y
valid_min: 48
valid_max: 66
longitude Size:342
standard_name: longitude
long_name: Longitude
units: degrees_east
unit_long: Degrees East
axis: X
time Size:1462
standard_name: time
long_name: Time
units: seconds since 1970-01-01 00:00:00
calendar: gregorian
axis: T
11 global attributes:
Conventions: CF-1.11
title: Baltic Sea SST analysis, daily reprocessed level 4 analysis
institution: Danish Meteorological Institute, DMI
source: ESA SST CCI, C3S and CMEMS FMI and SMHI Sea ice concentration
history: Version 1.0
references: Høyer, J. L. and She, J., Optimal interpolation of sea surface temperature for the North Sea and Baltic Sea, J. Mar. Sys., Vol 65, 1-4, pp. 176-189, 2007, Høyer, J. L., Le Borgne, P., & Eastwood, S. (2014). A bias correction method for Arctic satellite sea surface temperature observations. Remote Sensing of Environment, 146, 201-213.
comment: IN NO EVENT SHALL DMI OR ITS REPRESENTATIVES BE LIABLE FOR ANY DAMAGES WHATSOEVER INCLUDING, WITHOUT LIMITATION, SPECIAL, INDIRECT, INCIDENTAL OR CONSEQUENTIAL DAMAGES OR DAMAGES FOR LOSS OF BUSINESS PROFITS OR SAVINGS, BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION OR OTHER PECUNIARY LOSS ARISING OUT OF THE USE OF OR THE BUSINESS INTERRUPTION, LOSS OF BUSINESS INFORMATION OR OTHER PECUNIARY LOSS ARISING OUT OF THE USE OF OR THE INABILITY TO USE THIS DMI PRODUCT, EVEN IF DMI HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. THIS LIMITATION SHALL APPLY TO CLAIMS OF PERSONAL INJURY TO THE EXTENT PERMITTED BY LAW. SOME COUNTRIES OR STATES DO NOT ALLOW THE EXCLUSION OR LIMITATION OF LIABILITY FOR CONSEQUENTIAL, SPECIAL, INDIRECT, INCIDENTAL DAMAGES AND, ACCORDINGLY, SOME PORTIONS OF THESE LIMITATIONS MAY NOT APPLY TO YOU. BY USING THIS PRODUCT, YOU HAVE ACCEPTED THAT THE ABOVE LIMITATIONS OR THE MAXIMUM LEGALLY APPLICABLE SUBSET OF THESE LIMITATIONS APPLY TO YOUR USE OF THIS PRODUCT. WARNING Some applications are unable to properly handle signed bytevalues. If values are encountered > 127, please subtract 256 from this reported value
subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
subset:productId: SST_BAL_SST_L4_REP_OBSERVATIONS_010_016
subset:datasetId: DMI_BAL_SST_L4_REP_OBSERVATIONS_010_016_202012
subset:date: 2024-03-30T12:33:28.633Z
# Load and gather the temperature data in a tibble
temp_tibble <- callCopernicusCovariate("sst", messages = 1) Processing - Gathering data from df 1/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 2/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 3/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 4/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 5/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 6/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 7/8.
mutate: new variable 'date' (double) with 1,462 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 56,462,847 rows (91%), 5,278,875 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 211,155 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 8/8.
mutate: new variable 'date' (double) with 1,521 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 5 unique values and 0% NA
filter: removed 69,342,258 rows (90%), 7,868,265 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 253,815 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Note - The data were filtered internally for January month
Completed
# Visualize temperature frequency distribution
hist(temp_tibble$sst)# Visualize temperature spatial distribution
# plot_map +
# geom_point(data = temp_tibble,
# aes(X*1000, Y*1000, color = sst))
# Obtain temporal availability, this will be the temporal window to filter the data
unique(temp_tibble$year) [1] 1991 1992 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005
[16] 2006 2007 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020
[31] 2021 2022 2023
# Trim years we have temperature for (again, annoying! Fix the temperatures later)
d <- d |>
filter(year %in% unique(temp_tibble$year))filter: removed 584 rows (1%), 47,169 rows remaining
# Loop through all year combos, extract the temperatures at the data locations
d <- extractCovariateAtLocation(
"sst", # Name of the covariate to extract. One of: sst, chlorophyll, depth.
d, # A df containing the set of yearand locations to be evaluated.
temp_tibble, # A df containing the covariate at location
changesYearly = 1, # Is the covariate time variant (e.g. temp) or not (e.g. depth)
"temp",# Name to give to the covariate evaluated at location in the df
messages = 1 # dichotomous
)Processing - Gathering covariate information at location for year 1/31:1992.
filter: removed 41,917 rows (89%), 5,252 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 2/31:1993.
filter: removed 44,766 rows (95%), 2,403 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 3/31:1994.
filter: removed 45,515 rows (96%), 1,654 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 4/31:1995.
filter: removed 45,537 rows (97%), 1,632 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 5/31:1996.
filter: removed 46,903 rows (99%), 266 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 6/31:1997.
filter: removed 46,730 rows (99%), 439 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 7/31:1998.
filter: removed 45,512 rows (96%), 1,657 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 8/31:1999.
filter: removed 44,254 rows (94%), 2,915 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 9/31:2000.
filter: removed 45,991 rows (98%), 1,178 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 10/31:2001.
filter: removed 45,141 rows (96%), 2,028 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 11/31:2002.
filter: removed 45,400 rows (96%), 1,769 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 12/31:2003.
filter: removed 46,377 rows (98%), 792 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 13/31:2004.
filter: removed 45,873 rows (97%), 1,296 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 14/31:2005.
filter: removed 44,950 rows (95%), 2,219 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 15/31:2006.
filter: removed 45,983 rows (97%), 1,186 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 16/31:2007.
filter: removed 44,769 rows (95%), 2,400 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 17/31:2008.
filter: removed 45,406 rows (96%), 1,763 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 18/31:2009.
filter: removed 43,407 rows (92%), 3,762 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 19/31:2010.
filter: removed 46,467 rows (99%), 702 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 20/31:2012.
filter: removed 45,937 rows (97%), 1,232 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 21/31:2013.
filter: removed 45,941 rows (97%), 1,228 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 22/31:2014.
filter: removed 45,924 rows (97%), 1,245 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 23/31:2015.
filter: removed 46,297 rows (98%), 872 rows remaining
filter: removed 1,647,438 rows (95%), 84,462 rows remaining
Processing - Gathering covariate information at location for year 24/31:2016.
filter: removed 46,405 rows (98%), 764 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 25/31:2017.
filter: removed 46,034 rows (98%), 1,135 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 26/31:2018.
filter: removed 46,238 rows (98%), 931 rows remaining
filter: removed 1,689,669 rows (98%), 42,231 rows remaining
Processing - Gathering covariate information at location for year 27/31:2019.
filter: removed 46,219 rows (98%), 950 rows remaining
filter: removed 1,638,906 rows (95%), 92,994 rows remaining
Processing - Gathering covariate information at location for year 28/31:2020.
filter: removed 46,122 rows (98%), 1,047 rows remaining
filter: removed 1,681,137 rows (97%), 50,763 rows remaining
Processing - Gathering covariate information at location for year 29/31:2021.
filter: removed 46,313 rows (98%), 856 rows remaining
filter: removed 1,681,137 rows (97%), 50,763 rows remaining
Processing - Gathering covariate information at location for year 30/31:2022.
filter: removed 46,377 rows (98%), 792 rows remaining
filter: removed 1,681,137 rows (97%), 50,763 rows remaining
Processing - Gathering covariate information at location for year 31/31:2023.
filter: removed 46,365 rows (98%), 804 rows remaining
filter: removed 1,681,137 rows (97%), 50,763 rows remaining
Completed
Satellite derived chlorophyll abundance
https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_BGC_001_029/description
## Load satellite derived chlorophyll
# Source: https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_BGC_001_029/download
# Print details
print(nc_open(paste(covPath, "chlorophyll", "cmems_mod_glo_bgc_my_0.25_P1D-m_1713795613611_01012017_12312022.nc", sep = "/")))File /Users/maxlindmark/Dropbox/Max work/R/larval-sizes/data/covariates/chlorophyll/cmems_mod_glo_bgc_my_0.25_P1D-m_1713795613611_01012017_12312022.nc (NC_FORMAT_NETCDF4):
6 variables (excluding dimension variables):
float chl[longitude,latitude,depth,time] (Contiguous storage)
units: mg m-3
_FillValue: NaN
standard_name: mass_concentration_of_chlorophyll_a_in_sea_water
long_name: Total Chlorophyll
float no3[longitude,latitude,depth,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_nitrate_in_sea_water
long_name: Nitrate
float nppv[longitude,latitude,depth,time] (Contiguous storage)
units: mg m-3 day-1
_FillValue: NaN
standard_name: net_primary_production_of_biomass_expressed_as_carbon_per_unit_volume_in_sea_water
long_name: Total Primary Production of Phyto
float o2[longitude,latitude,depth,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_dissolved_molecular_oxygen_in_sea_water
long_name: Dissolved Oxygen
float po4[longitude,latitude,depth,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_phosphate_in_sea_water
long_name: Phosphate
float si[longitude,latitude,depth,time] (Contiguous storage)
units: mmol m-3
_FillValue: NaN
standard_name: mole_concentration_of_silicate_in_sea_water
long_name: Dissolved Silicate
4 dimensions:
depth Size:1
standard_name: depth
long_name: Depth
units: m
unit_long: Meters
axis: Z
positive: down
valid_min: 0.505760014057159
valid_max: 5902.05810546875
latitude Size:13
standard_name: latitude
long_name: Latitude
units: degrees_north
unit_long: Degrees North
axis: Y
valid_min: -80
valid_max: 90
longitude Size:21
standard_name: longitude
long_name: Longitude
units: degrees_east
unit_long: Degrees East
axis: X
time Size:2191
standard_name: time
long_name: Time
units: seconds since 1970-01-01 00:00:00
calendar: gregorian
axis: T
12 global attributes:
Conventions: CF-1.11
title: Daily mean fields for product GLOBAL_REANALYSIS_BIO_001_029
institution: Mercator Ocean
producer: CMEMS - Global Monitoring and Forecasting Centre
source: MERCATOR FREEBIORYS2V4
credit: E.U. Copernicus Marine Service Information (CMEMS)
contact: servicedesk.cmems@mercator-ocean.eu
references: http://marine.copernicus.eu
subset:source: ARCO data downloaded from the Marine Data Store using the MyOcean Data Portal
subset:productId: GLOBAL_MULTIYEAR_BGC_001_029
subset:datasetId: cmems_mod_glo_bgc_my_0.25_P1D-m_202112
subset:date: 2024-04-22T14:20:13.611Z
# Load and gather the temperature data in a tibble
chl_tibble <- callCopernicusCovariate("chlorophyll", messages = 1) Processing - Gathering data from df 1/4.
mutate: new variable 'date' (double) with 2,191 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 6 unique values and 0% NA
filter: removed 276,690 rows (92%), 25,668 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 828 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 2/4.
mutate: new variable 'date' (double) with 2,922 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 8 unique values and 0% NA
filter: removed 369,012 rows (92%), 34,224 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 1,104 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 3/4.
mutate: new variable 'date' (double) with 3,288 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 9 unique values and 0% NA
filter: removed 415,242 rows (92%), 38,502 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 1,242 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Processing - Gathering data from df 4/4.
mutate: new variable 'date' (double) with 2,556 unique values and 0% NA
mutate: new variable 'month' (double) with 12 unique values and 0% NA
new variable 'day' (integer) with 31 unique values and 0% NA
new variable 'year' (double) with 7 unique values and 0% NA
filter: removed 322,782 rows (92%), 29,946 rows remaining
group_by: 3 grouping variables (year, longitude, latitude)
summarise: now 966 rows and 4 columns, 2 group variables remaining (year, longitude)
ungroup: no grouping variables
Note - The data were filtered internally for January month
Completed
# Visualize chlorophyll frequency distribution
hist(chl_tibble$chl)# Visualize chlorophyll spatial distribution
# plot_map +
# geom_point(data = chl_tibble,
# aes(X*1000, Y*1000, color = chl))
# Obtain temporal availability, this will be the temporal window to filter the data
sort(unique(chl_tibble$year)) [1] 1993 1994 1995 1996 1997 1998 1999 2000 2001 2002 2003 2004 2005 2006 2007
[16] 2008 2009 2010 2011 2012 2013 2014 2015 2016 2017 2018 2019 2020 2021 2022
# Trim years we have chlorophyll for.
d <- d |>
filter(year %in% unique(chl_tibble$year)) # We loose 13% of the data by including chl. filter: removed 6,056 rows (13%), 41,113 rows remaining
# Loop through all year combos, extract the chl at the data locations
d <- extractCovariateAtLocation(
"chl",
d,
chl_tibble,
changesYearly = 1,
"chl",
messages = 1
)Processing - Gathering covariate information at location for year 1/29:1993.
filter: removed 38,710 rows (94%), 2,403 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 2/29:1994.
filter: removed 39,459 rows (96%), 1,654 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 3/29:1995.
filter: removed 39,481 rows (96%), 1,632 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 4/29:1996.
filter: removed 40,847 rows (99%), 266 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 5/29:1997.
filter: removed 40,674 rows (99%), 439 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 6/29:1998.
filter: removed 39,456 rows (96%), 1,657 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 7/29:1999.
filter: removed 38,198 rows (93%), 2,915 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 8/29:2000.
filter: removed 39,935 rows (97%), 1,178 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 9/29:2001.
filter: removed 39,085 rows (95%), 2,028 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 10/29:2002.
filter: removed 39,344 rows (96%), 1,769 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 11/29:2003.
filter: removed 40,321 rows (98%), 792 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 12/29:2004.
filter: removed 39,817 rows (97%), 1,296 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 13/29:2005.
filter: removed 38,894 rows (95%), 2,219 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 14/29:2006.
filter: removed 39,927 rows (97%), 1,186 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 15/29:2007.
filter: removed 38,713 rows (94%), 2,400 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 16/29:2008.
filter: removed 39,350 rows (96%), 1,763 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 17/29:2009.
filter: removed 37,351 rows (91%), 3,762 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 18/29:2010.
filter: removed 40,411 rows (98%), 702 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 19/29:2012.
filter: removed 39,881 rows (97%), 1,232 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 20/29:2013.
filter: removed 39,885 rows (97%), 1,228 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 21/29:2014.
filter: removed 39,868 rows (97%), 1,245 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 22/29:2015.
filter: removed 40,241 rows (98%), 872 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 23/29:2016.
filter: removed 40,349 rows (98%), 764 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 24/29:2017.
filter: removed 39,978 rows (97%), 1,135 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 25/29:2018.
filter: removed 40,182 rows (98%), 931 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 26/29:2019.
filter: removed 40,163 rows (98%), 950 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 27/29:2020.
filter: removed 40,066 rows (97%), 1,047 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 28/29:2021.
filter: removed 40,257 rows (98%), 856 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Processing - Gathering covariate information at location for year 29/29:2022.
filter: removed 40,321 rows (98%), 792 rows remaining
filter: removed 4,002 rows (97%), 138 rows remaining
Completed
Satellite derived depth
https://data.marine.copernicus.eu/product/GLOBAL_MULTIYEAR_BGC_001_029/description
## Load satellite derived depth
# Source: https://emodnet.ec.europa.eu/geoviewer
# Print details
print(nc_open(paste(covPath, "depth", "Mean depth natural colour (with land).nc", sep = "/")))File /Users/maxlindmark/Dropbox/Max work/R/larval-sizes/data/covariates/depth/Mean depth natural colour (with land).nc (NC_FORMAT_CLASSIC):
1 variables (excluding dimension variables):
float elevation[longitude,latitude]
_FillValue: 0
long_name: Sea-floor height (above Lowest Astronomical Tide datum) {bathymetric height}
standard_name: sea_surface_height
units: m
2 dimensions:
latitude Size:8046
_CoordinateAxisType: Lat
actual_range: 52.7786458333331
actual_range: 61.1588541666588
axis: Y
ioos_category: Location
long_name: Latitude
standard_name: latitude
units: degrees_north
longitude Size:15764
_CoordinateAxisType: Lon
actual_range: 8.76406250000035
actual_range: 25.1755208333316
axis: X
ioos_category: Location
long_name: Longitude
standard_name: longitude
units: degrees_east
29 global attributes:
cdm_data_type: Grid
comment: Uses attributes recommended by https://cfconventions.org
Conventions: CF-1.8, COARDS, ACDD-1.3
creator_name: VLIZ
creator_type: institution
creator_url: https://vliz.be
description: EMODNET Bathymetry DTM 2020
Easternmost_Easting: 25.1755208333316
geospatial_lat_max: 61.1588541666588
geospatial_lat_min: 52.7786458333331
geospatial_lat_units: degrees_north
geospatial_lon_max: 25.1755208333316
geospatial_lon_min: 8.76406250000035
geospatial_lon_units: degrees_east
history: source of the data can be found in the dataset or in the documentation available from http://www.emodnet-bathymetry.eu/
2023-03-07T11:36:35Z (local files)
2023-03-07T11:36:35Z https://erddap.emodnet.eu/erddap/griddap/dtm_2020_v2_e0bf_e7e4_5b8f.nc?elevation[(52.779008670858346):1:(61.15886095654737)][(8.763920325839974):1:(25.17567255317558)]
infoUrl: https://vliz.be
institution: VLIZ
keywords: above, astronomical, bathymetric, bathymetry, data, datum, dtm, earth, Earth Science > Oceans > Sea Surface Topography > Sea Surface Height, elevation, emodnet, floor, full, height, lowest, ocean, oceans, science, sea, sea-floor, sea_surface_height, surface, tide, topography, vliz
keywords_vocabulary: GCMD Science Keywords
license: The data may be used and redistributed for free but is not intended
for legal use, since it may contain inaccuracies. Neither the data
Contributor, ERD, NOAA, nor the United States Government, nor any
of their employees or contractors, makes any warranty, express or
implied, including warranties of merchantability and fitness for a
particular purpose, or assumes any legal liability for the accuracy,
completeness, or usefulness, of this information.
Northernmost_Northing: 61.1588541666588
references: https://vliz.be
source: source of the data can be found in the dataset or in the documentation available from http://www.emodnet-bathymetry.eu/
sourceUrl: (local files)
Southernmost_Northing: 52.7786458333331
standard_name_vocabulary: CF Standard Name Table v70
summary: EMODNET Bathymetry DTM 2020. VLIZ data from a local source.
title: EMODNET Bathymetry DTM 2020 full
Westernmost_Easting: 8.76406250000035
# Load depth data
dep_raster <- terra::rast(paste0(home, "/data/covariates/depth/Mean depth natural colour (with land).nc"))
# See depth extension
plot(dep_raster)## +++++++++++++++++++++++++++++++++++++++++++++++++
## The raster is shifted to the Baltic and excludes
## some of our points. let´s get depth another way.
## +++++++++++++++++++++++++++++++++++++++++++++++++
## Generate a depth box containing the bathymetries.
depth_box <- getNOAA.bathy(min(d$lon) - .1, max(d$lon) + .1, min(d$lat) - .1, max(d$lat) + .1)Querying NOAA database ...
This may take seconds to minutes, depending on grid size
Building bathy matrix ...
# Visualize depth frequency distribution
hist(dep_raster)
# Extract depth at locations
d <- extractCovariateAtLocation(
"elevation", # Name of the covariate to extract. One of: sst, chlorophyll, elevation
d, # A df containing the set locations to be evaluated.
dep_raster,
changesYearly = 0,
nametocov = "depth2",
messages = 1
)
# The depth data is shifted towards the Baltic, leading to non assignment of depths.
plot_map +
geom_point(data = d,
aes(X*1000, Y*1000, color = depth2))
## +++++++++++++++++++++++++++++++++++++++++++++++++
## The raster is shifted to the Baltic and excludes
## some of our points. let´s get depth another way.
## i.e. using marmap (thus NOAA) source.
## +++++++++++++++++++++++++++++++++++++++++++++++++
## Obtain depth at locations.
d <- cbind(
d,
get.depth(depth_box, x=d$lon, y=d$lat, locator=F)["depth"]
)
## Convert to strictly positive values.
d$depth <- d$depth*(-1)
# Check
plot_map +
geom_point(data = d,
aes(X*1000, Y*1000, color = depth))Check covariates
# Get the proportion of observations not assigned with a covariate value at prior steps
colMeans(is.na(d)) haul_id species length_mm year day month
0.000000000 0.000000000 0.000000000 0.000000000 0.000000000 0.000000000
lat lon temp_obs period yday X
0.000000000 0.000000000 0.396395301 0.000000000 0.000000000 0.000000000
Y temp chl
0.000000000 0.002359351 0.002480967
Plot response variables
d |>
summarise(n = n(), .by = species) |>
arrange(desc(n))# A tibble: 18 × 2
species n
<chr> <int>
1 Crystallogobius linearis 11360
2 Clupea harengus 10282
3 Aphia minuta 5856
4 Pholis gunnellus 4217
5 Pomatoschistus sp 1952
6 Sprattus sprattus adult 1382
7 Syngnathus rostellatus 1136
8 Chirolophis ascanii 1114
9 Ammodytidae 1092
10 Microstomus kitt 622
11 Myoxocephalus scorpius 518
12 Sardina pilchardus 474
13 Agonus cataphractus 385
14 Anguilla anguilla 268
15 Limanda limanda 247
16 Argentina silus 92
17 Argentina spp 77
18 Taurulus bubalis 39
# Distribution of data
ggplot(d, aes(length_mm)) +
geom_histogram() +
facet_wrap(~species, scales = "free")# Effect of day of the year
ggplot(d, aes(yday, length_mm)) +
geom_point(size = 0.4, alpha = 0.4) +
geom_smooth(method = "lm") +
facet_wrap(~species, scales = "free")# Effect of year
ggplot(d, aes(year, length_mm)) +
geom_point(size = 0.4, alpha = 0.4) +
geom_smooth(method = "lm") +
facet_wrap(~species, scales = "free")# Effect of temperature
ggplot(d, aes(temp, length_mm)) +
geom_point(size = 0.4, alpha = 0.4) +
geom_smooth(method = "lm") +
#geom_smooth() +
facet_wrap(~species, scales = "free")# Effect of chlorophyll
ggplot(d, aes(chl, length_mm)) +
geom_point(size = 0.4, alpha = 0.4) +
geom_smooth(method = "lm") +
#geom_smooth() +
facet_wrap(~species, scales = "free")# Effect of depth [source: Copernicus]
# ggplot(d, aes(depth2, length_mm)) +
# geom_point(size = 0.4, alpha = 0.4) +
# geom_smooth(method = "lm") +
# #geom_smooth() +
# facet_wrap(~species, scales = "free")
#
# # Effect of depth [source: NOAA]
# ggplot(d, aes(depth, length_mm)) +
# geom_point(size = 0.4, alpha = 0.4) +
# geom_smooth(method = "lm") +
# #geom_smooth() +
# facet_wrap(~species, scales = "free")# d |>
# group_by(species) |>
# mutate(sd = sd(length_mm),
# mean = mean(length_mm)) |>
# ungroup() |>
# mutate(outlier = ifelse(length_mm > mean - 4*sd & length_mm < mean + 4*sd,
# "No", "Yes")) |>
# ggplot(aes(year, length_mm, color = outlier)) +
# facet_wrap(~species, scales = "free") +
# geom_point()
#
# d2 <- dSave data
d <- d |> drop_na(temp)drop_na: removed 97 rows (<1%), 41,016 rows remaining
write_csv(d, paste0(home, "/data/clean/larval_size.csv"))